mata
mata drop *()
mata set matastrict on
mata set mataoptimize on
mata set matalnum off

// Copyright David Roodman 2005-11. May be distributed free.
// Mata code for xtabond2 version 2.9.4, 23 May 2011

real scalar xtabond2_mata() {
	real scalar artests, arlevels, steps, h, orthogonal, consopt, r, r2, small, robust, YInd, touseInd, onestepnonrobust,
		keep, j, j0, k, collapse, NumObs, NumObsEff, NumGroups, idInd, tInd, diffsargan, NumOtherInsts,
		FullInstSetDiffed, FullInstSetEq, MakeExtraInsts, Nnottouse, lag, NumIVOptions, NumGMMOptions, NumDiffSargans, NumInsts,
		N, T, NT, SystemGMM, RowsPerGroup, SystemHeight, j_GMM, j_IV, weights, wttot, rc, Nclust, svmat, tdelta
	real matrix tmp, Xi, X0, Y0, XVars, Subscripts, Z_IV, Z_GMM, SubscriptsStart, SubscriptsStep
	string scalar idName, tName, NumBaseVars, LevelArg, optionsArg, gmmstyle, touseName, bname, Vname, idSampleName, wtvarname, wtype, wexp
	string rowvector VarlistNames, XNames, InstOptTxt, GMMInstNames, IVInstNames
	string matrix Stripe
	real colvector p, p_IV, p_GMM, p_AllIV, p_AllGMM, p_System, touseVar, idVar, tVar, YVar, SortByIDEq, SortByEqID, Eq, nottouse, Fill, touse, Complete, ///
		wt, wt0, _wt, wtvar, clust
	pointer(real rowvector) matrix InstOptInd
	real rowvector VarlistInds, XInds, TiMinMax, DFm_keep, ivmz, ivpassthru, ivequation, gmmcollapse, gmmpassthru, gmmequation
	pointer(pointer rowvector) GMMInstParams, ThisInstParams, IVInstParams
	real colvector e1, e2, wli, Xwl, ZHw, b1, b2, Ze, psiw, ARz, ARp, A1diag
	real matrix S, D, ZXi, Zi, A1Ze, A2Ze, H, psit, V1, V2, A1, A2, App, V1robust, V2robust, sum_wwli, XZA, VXZA, m2VZXA, touse2,
		  diffsargans, X, Y, ZX, ZXp, ZY, gmmlaglimits, w, wl
	real scalar i, sig2, g, wHw, sarganDF, sargan, sarganp, hansen, hansenp, b, V, DFm, DFr, F, Fp, Wald, Waldp
	pointer (real matrix) pA, pV, pVrobust
	pointer (real matrix) colvector pZe1i, pei
	pointer (real colvector) pe, pwe
	pointer (real matrix function) pfnXform

	pragma unset NumIVOptions; pragma unset NumGMMOptions; pragma unset j_GMM; pragma unset XVars; pragma unset Z_IV; pragma unset InstOptTxt
	pragma unset GMMInstNames; pragma unset p; pragma unset touseVar; pragma unset idVar; pragma unset tVar; pragma unset YVar; pragma unset wtvar
	pragma unset InstOptInd; pragma unset gmmcollapse; pragma unset gmmpassthru; pragma unset gmmequation; pragma unset GMMInstParams
	pragma unset gmmlaglimits; pragma unset g

	if (favorspeed())
		printf(`"{txt}Favoring speed over space. To switch, type or click on {stata "mata: mata set matafavor space, perm"}.\n"')
	else
		printf(`"{txt}Favoring space over speed. To switch, type or click on {stata "mata: mata set matafavor speed, perm"}.\n"')
	
	if (st_nobs() <= 1) {
		printf("{err}No observations.\n")
		return(2000)
	}	

	LevelArg = st_local("level")

	stata("marksample touse")
	touseInd = st_varindex(touseName = st_local("touse"))
	stata("qui tsset") // prevents "not sorted" message
	if ((tName = st_global("_dta[tis]"))=="" | (idName = st_global("_dta[iis]"))=="") {
		printf("{err}You must tsset the data and specify the panel and time variables.\n")
		return (459)
	}
	tInd = st_varindex(tName)
	idInd = st_varindex(idName)
	tdelta = st_numscalar("c(stata_version)") >= 10 ? st_numscalar("r(tdelta)") : 1

	stata("markout "+ touseName + " " + idName)
	stata("bysort " + idName + ": egen " + (idSampleName=st_tempname()) + "=max(" + touseName + ")")

	st_view(touseVar, ., touseInd, idSampleName)

	if (any(touseVar)==0) {
		printf("{err}No observations.\n")
		return(2000)
	}

	st_view(tVar, ., tInd, idSampleName)
	st_view(idVar, ., idInd, idSampleName)
	if (missing(tVar)) {
		printf("{err}Missing values in time variable (%s).\n", tName)
		return (459)
	}
	tVar = (tVar :- min(tVar)) / tdelta
	T = max(tVar) + 1

	VarlistNames = tokens(st_local("varlist"))

	if (weights = strlen(wtype = st_local("weight"))) {
		stata("quietly generate double " + (wtvarname = st_tempname()) + (wexp = st_local("exp")) + " if " + touseName)
		st_view(wtvar, ., wtvarname, idSampleName)
	}

	st_local("0", "," + st_local("options"))
	stata("syntax, [Robust Cluster(varname) TWOstep noConstant noLeveleq ORthogonal ARtests(integer 2) SMall H(integer 3) DPDS2 Level(integer $S_level) ARLevels noDiffsargan SVmat *]")
	arlevels = st_local("arlevels")!= ""
	small = st_local("small")!= ""
	steps = 1 + (st_local("twostep")=="twostep")
	if ((Nclust = (st_local("cluster") != "")) & favorspeed() == 0) {
		printf("{err}cluster() not available in space-favoring mode.\n")
		return(198)
	}
	if ((svmat = st_local("svmat") != "") & favorspeed() == 0) {
		printf("{err}svmat not available in space-favoring mode.\n")
		return(198)
	}
	robust = Nclust | st_local("robust") != "" | (substr(wtype,1,1) == "p" & steps == 1)
	onestepnonrobust = steps == 1 & robust == 0
	diffsargan = st_local("diffsargan") == ""
	pfnXform = (orthogonal = st_local("orthogonal") != "") ? &_Orthog() : &_Difference()
	h = strtoreal(st_local("h"))
	artests = strtoreal(st_local("artests"))
	SystemGMM = st_local("leveleq") == ""

	if (SystemGMM == 0 & arlevels) {
		printf("{err}arlevels option invalid for difference GMM estimation.\n")
		return (198)
	}
	if (h != 1 & h != 2 & h != 3) {
		printf("{err}h(%f) invalid.\n", h)
		return (198)
	}

	// Compute row number for each observation once the data set is filled out to NT rows
	Fill = J(rows(tVar), 1, 1)
	for (r = 2; r <= rows(tVar); r++)
		Fill[r] = idVar[r]==idVar[r-1] ? Fill[r-1] : Fill[r-1] + T
	N = (NT = Fill[rows(Fill)] + T - 1) / T
	Fill = Fill + tVar
	SystemHeight = (SystemGMM + 1) * NT
	VarlistInds = st_tsrevar(VarlistNames)
	YInd = VarlistInds[1]
	st_view(YVar, ., YInd, idSampleName)
	(Y0 = touse = J(NT, 1, .))[Fill] = YVar
	touse[Fill] = touseVar

	if (weights)
		(wt0 = J(NT, 1, .))[Fill] = wtvar

	if (cols(VarlistInds) > 1) {
		XNames = VarlistNames[|2 \ .|]
		XInds = VarlistInds[|2 \ .|]
	} else {
		XNames = J(1, 0, "")
		XInds = J(1, 0, 0)
	}

	X0 = J(NT, cols(XInds), .)
	if (cols(XInds)) {
		st_view(XVars, ., XInds, idSampleName)
		X0[Fill, .] = XVars
	}

	if (Nclust) {
		stata("tempvar clust")
		stata("egen long " + st_local("clust") + " = group(" + st_local("cluster") + ") if " + idSampleName + ", missing")
		st_view(p, ., st_local("clust"), idSampleName)
		(clust = J(NT, 1, .))[Fill] = p
		Nclust = colmax(p)
	}

	if (consopt = (SystemGMM & st_local("constant") == "")) {
		XNames = "_cons", XNames
		X0 = J(NT, 1, 1), X0
	} else if (cols(XInds) == 0) {
		printf("{err}No regressors.\n")
		return (481)
	}
	RowsPerGroup = (1 + SystemGMM) * T

	SubscriptsStart = SystemHeight-RowsPerGroup+1,. \ SystemHeight,.
	SubscriptsStep = J(2, 1, RowsPerGroup), J(2, 1, 0)

	if (SystemGMM) { // reorder rows by t, equation rather than equation, t
		SortByIDEq = 0 :: SystemHeight-1
		SortByIDEq = trunc(SortByIDEq :/ RowsPerGroup) :* T + (mod(SortByIDEq, RowsPerGroup) :>= T) :* NT + mod(SortByIDEq, T) :+ 1
		SortByEqID = invorder(SortByIDEq)
		(Eq = J(SystemHeight, 1, 1))[|1 \ NT|] = J(NT, 1, 0) // dummy for levels equations
		_collate(Eq, SortByIDEq)
	}

	if (orthogonal) Complete = !(rowmissing(X0) :| rowmissing(Y0))

	if (j_IV = consopt) {
		IVInstParams = &(NULL, &0, &0, &2, &X0[,1], &"")  // add constant to list of IV instruments
		ivmz = ivpassthru = ivequation = 0
		IVInstNames = "_cons"
	} else {
		IVInstParams = NULL
		IVInstNames = J(1, 0, "")
	}

	rc = _ParseInsts(j_IV, j_GMM, NumIVOptions, NumGMMOptions, IVInstNames, GMMInstNames, IVInstParams, GMMInstParams, SystemGMM, Complete, N, T, NT, 
		idSampleName, Fill, orthogonal, ivmz, ivpassthru, ivequation, gmmcollapse, gmmpassthru, gmmequation, gmmlaglimits)
	if (rc) return (rc)

	NumDiffSargans = NumIVOptions + NumGMMOptions + SystemGMM

	_MakeIVInsts(InstOptInd, InstOptTxt, g, Z_IV, j_IV, SystemHeight, N, T, NT, NumIVOptions, IVInstParams, pfnXform, Complete, NumDiffSargans, SystemGMM)
	if (NumGMMOptions & diffsargan) {
		p_System = J(1, 0, 0)
		i = 0; ThisInstParams = GMMInstParams
		while (ThisInstParams != NULL) {
			FullInstSetEq = *(*ThisInstParams)[6]
			gmmstyle = *(*ThisInstParams)[9]
			NumInsts = *(*ThisInstParams)[10]
			p = 0..i , i+1+NumInsts..j_GMM+1
			InstOptInd[g, 2] = &(cols(p)>2 ? p[|2 \ cols(p) - 1|] : J(1, 0, .))
			InstOptTxt[g--] = gmmstyle

			if (SystemGMM & FullInstSetEq == 0) { // mark GMM instruments for difference equation, for inclusion in diff-Sargan test of levels instruments
				NumBaseVars = *(*ThisInstParams)[2]
				collapse = *(*ThisInstParams)[4]
				FullInstSetDiffed = *(*ThisInstParams)[5]
				MakeExtraInsts = *(*ThisInstParams)[7]
				p_System = p_System , i+1 .. i+NumInsts - (MakeExtraInsts? (collapse? 1 : T - FullInstSetDiffed) * NumBaseVars : 0)
			}
			i = i + NumInsts
			ThisInstParams = (*ThisInstParams)[1]
		}
		if (SystemGMM) {
			InstOptInd[1, 2] = &p_System
			InstOptTxt[1] = "GMM instruments for levels"
		}
	}

	if (SystemGMM) {
		if (weights) wt = wt0 \ wt0
		if (Nclust) clust = clust \ clust
		touse = (orthogonal? _lag(touse, 1) : touse) \ touse
		X = (*pfnXform)(X0, N, T, NT, Complete) \ X0
		Y = (*pfnXform)(Y0, N, T, NT, Complete) \ Y0
	} else {
		X = (*pfnXform)(X0, N, T, NT, Complete)
		Y = (*pfnXform)(Y0, N, T, NT, Complete)
		if (orthogonal) __lag(touse, 1)
		if (weights) wt = wt0
	}

	touse = touse :& !(rowmissing(Z_IV) :| rowmissing(X) :| rowmissing(Y))
	touseVar[,] = (SystemGMM? touse[|NT+1 \ .|] : touse)[Fill]

	if ((optionsArg=st_local("options")) != "") {
		printf("{err}%s invalid.", optionsArg)
		return (198)
	}

	if (favorspeed())
		Z_GMM = _MakeGMMInstSet(N, T, NT, SystemHeight, RowsPerGroup, j_GMM, touse, GMMInstParams)

	// Zero out excluded observations
	nottouse = J(Nnottouse = SystemHeight - sum(touse), 1, 0)
	r2 = Nnottouse
	for (r = SystemHeight; r; r--) if (touse[r] == 0) nottouse[r2--] = r
	Y = Y :* touse
	if (weights) wt = wt :* touse
	if (Nclust) clust = clust :* touse
	X[nottouse,] = J(Nnottouse, cols(X), 0)
	Z_IV[nottouse,] = J(Nnottouse, j_IV, 0)
	if (favorspeed())
		Z_GMM[nottouse,] = J(Nnottouse, j_GMM, 0)
	_editmissing(X, 0)
	_editmissing(Y, 0)
	_editmissing(Z_IV, 0)

	k = cols(keep = _rmcoll(X, 1, XNames))
	if (k == 0) {
		printf("{err}No regressors.\n")
		return (481)
	}

	if (k < cols(X)) {
		XNames = XNames[keep]
		X = X[, keep]
	}

	if ((j = j_GMM + j_IV) < k) {
		printf("{err}Equation not identified. Regessors outnumber instruments.\n")
		return (481)
	}

	NumObsEff = NumObs = sum(tmp = colshape(touse[|SystemHeight-NT+1 \ .|], T))
	NumGroups = sum(rowmax(tmp))
	if (weights) {
		wttot = sum(_wt = wt[|SystemHeight-NT+1 \ .|]) // holds weight sum first for "main" eq, then for eq used to compute sig2
		printf("(sum of weights is %f)\n", wttot)
		if (substr(wtype,1,1) == "f") {
			if (sum(_Difference(_wt, N, T, NT, Complete, 0))) {
				printf("{err}Frequency weights must be constant over time for {cmd:xtabond2}.\n")
				return (101)
			}
			NumObs = wttot
			if (onestepnonrobust)
				NumObsEff = wttot   // Effective sample size with fweights = sum of weights only if no clustering
				if (SystemGMM & h>1) wttot = sum(wt[|. \NT|])
			else {
				wt = wt * (NumObsEff / wttot)
				wttot = SystemGMM & h>1? sum(touse[|. \NT|]) : NumObsEff //sum of weights in Xformed eq
			}
		} else {
			wt = wt * (NumObs / wttot)
			wttot = SystemGMM & h>1? sum(touse[|. \NT|]) : NumObs //sum of weights in Xformed eq
		}
		X = X :* wt
		Y = Y :* wt
	} else
		wttot = SystemGMM & h>1? sum(touse[|. \NT|]) : NumObs

	if (NumObs == 0) {
		printf("{err}No observations.\n")
		return(2000)
	}
	TiMinMax = minmax(rowsum(tmp))

	if (SystemGMM) { // reorder rows by t, equation rather than equation, t
		_collate(X, SortByIDEq)
		_collate(Y, SortByIDEq)
		_collate(Z_IV, SortByIDEq)
		_collate(touse, SortByIDEq)
		if (weights) _collate(wt, SortByIDEq)
		if (Nclust) _collate(clust, SortByIDEq)
		if (favorspeed()) _collate(Z_GMM, SortByIDEq)
	}

	Zi = J(RowsPerGroup, j_GMM, 0)
	if (favorspeed()) {
		ZY = quadcross(Z_IV, Y) \ quadcross(Z_GMM, Y)
		ZX = quadcross(Z_IV, X) \ quadcross(Z_GMM, X)
		if (svmat) svmat(X, Y, (Z_IV, Z_GMM), touse, wt)
	} else {
		ZX = J(j_GMM, k, 0); ZY = J(j_GMM, 1, 0)
		if (j_GMM) {
			Subscripts = SubscriptsStart
			for (i = N; i; i--) {
				Zi[,] = _MakeGMMInstSet(N, T, NT, SystemHeight, RowsPerGroup, j_GMM, touse, GMMInstParams, i)
				ZY = ZY + quadcross(Zi, Y[|Subscripts|])
				ZX = ZX + quadcross(Zi, X[|Subscripts|])
				Subscripts = Subscripts - SubscriptsStep
			}
		}
		ZY = quadcross(Z_IV, Y) \ ZY
		ZX = quadcross(Z_IV, X) \ ZX
	}
	H = _H(h, orthogonal, orthogonal, SystemGMM, T)
	S = J(j, j, 0); Zi = J(RowsPerGroup, j, 0); Subscripts = SubscriptsStart

	for (i = N; i; i--) {
		_wt = weights? sqrt(wt[|Subscripts|]) : 1
		if (j_IV) Zi[|.,. \ .,j_IV|] = Z_IV[|Subscripts|]
		if (j_GMM) Zi[|.,j_IV+1 \ .,.|] = favorspeed()? Z_GMM[|Subscripts|] :
		   _MakeGMMInstSet(N, T, NT, SystemHeight, RowsPerGroup, j_GMM, touse, GMMInstParams, i) :* touse[|Subscripts|]
		S = S + quadcross(Zi, _wt, quadcross(H, _wt, Zi))
		Subscripts = Subscripts - SubscriptsStep
	}

	A1 = invsym(S)
	V1 = invsym((XZA=quadcross(ZX, A1)) * ZX)
	e1 = Y - X * (b1 = V1 * XZA * ZY)
	pwe = &(weights? e1:/sqrt(wt) : e1)
	if (SystemGMM) pwe = &(*pwe :* (Eq :== (h==1)))
	sig2 = quadcross(*pwe,*pwe) / (2 - (orthogonal | h==1)) / wttot
	A1 = A1 / sig2
	V1 = V1 * sig2
	j0 = j - diag0cnt(A1) // Adjust roughly for collinear instruments, based on number of collinear moments
	if (j0 > NumGroups)
		printf("{res}Warning: Number of instruments may be large relative to number of observations.\n")

	A1Ze = A1 * (Ze = _Ze(e1, ., ., N, T, NT, SystemHeight, RowsPerGroup, j_GMM, touse, GMMInstParams, 
					SubscriptsStart, SubscriptsStep, Z_IV, Z_GMM))
	sarganp = chi2tail(sarganDF = j0 - k, sargan = quadcross(Ze, A1Ze))

	if (onestepnonrobust) {
		// correct H by sig2 too and prepare to do Hansen as well as Sargan
		H = H * sig2
		S = S * sig2
	} else {	
		S = J(j, j, 0)
		if (Nclust) {
			pZe1i = J(Nclust, 1, NULL)
			for (i = Nclust; i; i--)
				if (any(p = clust :== i)) {
					e2 = select(e1, p)
					pZe1i[i] = &( (j_IV? quadcross(e2, select(Z_IV, p)) : J(1, 0, 0)) , (j_GMM? quadcross(e2, select(Z_GMM, p)) : J(1, 0, 0)) )
					S = S + quadcross(*pZe1i[i], *pZe1i[i])
				}
		} else {
			pZe1i = J(N, 1, NULL); Subscripts = SubscriptsStart
			for (i = N; i; i--) {
				e2 = e1[|Subscripts|]
				pZe1i[i] = &( (j_IV? quadcross(e2, Z_IV[|Subscripts|]) : J(1, 0, 0)) , (j_GMM? quadcross(e2, 
							favorspeed()? Z_GMM[|Subscripts|] : _MakeGMMInstSet(N, T, NT, SystemHeight, RowsPerGroup, j_GMM, touse, GMMInstParams, i)) : J(1, 0, 0)) )
				S = S + quadcross(*pZe1i[i], *pZe1i[i])
/*				pZe1i[i] = &( (j_IV? (Z_IV[|Subscripts|]) : J(RowsPerGroup, 0, 0)) , (j_GMM? ( 
							favorspeed()? Z_GMM[|Subscripts|] : _MakeGMMInstSet(N, T, NT, SystemHeight, RowsPerGroup, j_GMM, touse, GMMInstParams, i)) : J(RowsPerGroup, 0, 0)) )
				Hack to impose homoskedasticity with cross-correlation of errors
				e2 = e2 * e2'
				for (DFm=RowsPerGroup; DFm; DFm--) e2[DFm,DFm] = sig2*(touse[|Subscripts|])[DFm]
				S = S + quadcross(*pZe1i[i], e2) * *pZe1i[i] */
				Subscripts = Subscripts - SubscriptsStep
			}
		}		
		if (robust) { 
			VXZA = V1 * quadcross(ZX, A1)
			V1robust = VXZA * S * VXZA'
		}
 		A2 = invsym(S)

		if (diag0cnt(A2)) {
			printf("{res}Warning: Two-step estimated covariance matrix of moments is singular.\n")
			printf("{res}  Using a generalized inverse to calculate %s.\n", steps==2? "optimal weighting matrix for two-step estimation" : 
				"robust weighting matrix for Hansen test")
			if (diffsargan) printf("{res}  Difference-in-Sargan/Hansen statistics may be negative.\n")
		}

		// even in one-step robust, get Hansen
		V2 = invsym((XZA = quadcross(ZX, A2)) * ZX) 
		e2 = Y - X * (b2 = (VXZA = V2 * XZA) * ZY)
		if (steps == 2) {
			pwe = &(weights? e2:/sqrt(wt) : e2)
			if (SystemGMM) pwe = &(*pwe :* (Eq :== (h==1)))
			sig2 = quadcross(*pwe,*pwe) / (2 - (orthogonal | h==1)) / wttot
		}
		A2Ze = A2 * (Ze = _Ze(e2, . , ., N, T, NT, SystemHeight, RowsPerGroup, j_GMM, touse, GMMInstParams, 
						SubscriptsStart, SubscriptsStep, Z_IV, Z_GMM))

		if (steps == 2 & robust) {
		// Windmeijer-corrected variance matrix for two-step
		//   Need to compute matrix whose jth column is 
		//	[sum_i Z_i'(xj_i*e1_i'+e1_i*xj_i')Z_i]*A2*Z'e2 (where xj = jth col of X)
		//   = sum_i (Z_i'xj_i*e1_i'Z_i*A2*Z'e2 + Z_i'e1_i*xj_i'Z_i*A2*Z'e2).
		//   Since e1_i'Z_i*A2*Z'e2 and xj_i'Z_i*A2*Z'e2 are scalars, they can be transposed and swapped with 
		//   adjacent terms. So this is:
		//   matrix whose jth col is sum_i (e1_i'Z_i*A2*Z'e2 + Z_i'e1_i*e2'Z*A2)Z_i'xj_i
		//   = sum_i (e1_i'Z_i*A2*Z'e2 + Z_i'e1_i*e2'Z*A2)Z_i'X_i
		//   (transformation reverse engineered from DPD.)
			D = J(j, k, 0)
			if (Nclust) {
				for (i = Nclust; i; i--)
					if (any(p = clust :== i)) {
						Xi = select(X, p)
						ZXi = (j_IV? quadcross(select(Z_IV, p), Xi) : J(0, k, 0)) \ (j_GMM? quadcross(select(Z_GMM, p), Xi) : J(0, k, 0))
						D = D + (*pZe1i[i] * A2Ze) * ZXi + quadcross(A2Ze * *pZe1i[i], ZXi)
					}
			} else {
				Subscripts = SubscriptsStart
				for (i = N; i; i--) {
					Xi = X[|Subscripts|]
					ZXi = (j_IV? quadcross(Z_IV[|Subscripts|], Xi) : J(0, k, 0)) \
					   (j_GMM? quadcross(favorspeed()? Z_GMM[|Subscripts|] :
								_MakeGMMInstSet(N, T, NT, SystemHeight, RowsPerGroup, j_GMM, touse, GMMInstParams, i), Xi) : J(0, k, 0))
					D = D + (*pZe1i[i] * A2Ze) * ZXi + quadcross(A2Ze * *pZe1i[i], ZXi)
					Subscripts = Subscripts - SubscriptsStep
				}
			}
			D = VXZA * D
			V2robust = V2 + D * V1robust * D' + (D + D) * V2
		}
		hansenp = chi2tail(sarganDF, hansen = quadcross(Ze, A2Ze))
	}

	if (steps == 1) {
		b = b1
		pV = robust? &V1robust : &V1
	} else {
		b = b2
		pV = robust? &V2robust : &V2
	}
	V = (*pV + *pV')/2

	if (steps == 1) {
		pV = &V1
		pVrobust = &V1robust
		pA = &A1
		pe = &e1
	} else {
		pV = &V2
		pVrobust = &V2robust
		pA = &A2
		pe = &e2
	}

	m2VZXA = (-2 * *pV) * quadcross(ZX, *pA)
	if (robust)
		pV = pVrobust

	if (onestepnonrobust == 0) { // preserve estimation residuals for AR tests
		pei = J(N, 1, NULL)
		Subscripts = SubscriptsStart
		for (i = N; i; i--) {
			pei[i] = &((*pe)[|Subscripts|])
			Subscripts = Subscripts - SubscriptsStep
		}
	}

	if (Nclust == 0) Nclust = NumGroups 
	if (small) {
		tmp = wttot/(wttot - k)
		V = V * (onestepnonrobust? tmp : NumObs/(NumObs-k+1)*Nclust/(Nclust-1))
		sig2 = sig2 * tmp
	}

	// In case constant is in column space of X, even despite noconstant, bump it out for F/Wald test
	DFm_keep = _rmcoll(SystemGMM? 		// for system GMM, drop difference equation from model fit test
					X[SortByEqID[|NT+1 \ .|], .] : 
					X)
	if (cols(DFm_keep) > consopt) {
		DFm = cols(DFm_keep = DFm_keep[|1+consopt\.|]) 
		b2 = b[DFm_keep]
		if (DFm)
			if (small)
				Fp = Ftail(DFm, DFr = (onestepnonrobust? NumObsEff - DFm : Nclust) - consopt,
						    F   = quadcross(b2, invsym(V[DFm_keep', DFm_keep])) * b2 / DFm)
			else
				Waldp = chi2tail(DFm, Wald = quadcross(b2, invsym(V[DFm_keep', DFm_keep])) * b2)
	} else {
		DFm = 0
		if (small) 
			DFr = (onestepnonrobust? NumObsEff : Nclust) - consopt
	}

	if (diffsargan) {
		diffsargans = J(5, NumDiffSargans, .)
		A1diag = diagonal(A1)
		p_AllIV  = j_IV ? 1..j_IV  : J(1, 0, 0)
		p_AllGMM = j_GMM? 1..j_GMM : J(1, 0, 0)
		for (g = NumDiffSargans; g; g--) {
			p_IV = *InstOptInd[g, 1]
			p_GMM = *InstOptInd[g, 2]
			p = (p_IV  == . ? p_AllIV  : p_IV ) , 
			    (p_GMM == . ? p_AllGMM : p_GMM) :+ j_IV
			NumOtherInsts = sum(A1diag[p] :!= 0)
			if (NumOtherInsts >= k & NumOtherInsts < j0) { // invalid if # remaining insts < # of regressors
				XZA = quadcross(ZXp = ZX[p,], App = invsym(S[p,p]))
				Ze = _Ze(Y - X * (invsym(XZA * ZXp) * (XZA * ZY[p])), p_IV, p_GMM, N, T, NT, SystemHeight, RowsPerGroup, 
						j_GMM, touse, GMMInstParams, SubscriptsStart, SubscriptsStep, Z_IV, Z_GMM)
				diffsargans[2, g] = (hansen==.? sargan : hansen) - (diffsargans[1, g] = quadcross(Ze, App * Ze)) // unrestricted Sargan, and difference
				diffsargans[3, g] = j0 - NumOtherInsts                                        // # insts in group
				diffsargans[4, g] = chi2tail(sarganDF - diffsargans[3, g], diffsargans[1, g]) // p value
				diffsargans[5, g] = chi2tail(diffsargans[3, g], diffsargans[2, g])            // p value
			}
		}
		NumDiffSargans = rows(InstOptTxt = select(InstOptTxt, diffsargans[1,]' :!= .))
		diffsargans = select(diffsargans, diffsargans[1,] :!= .)
	}

	// AR test. e=full residual vector. w = (weighted) diff or levels residuals tested for AR(). wl = unweighted lagged residuals
	if (onestepnonrobust) {
		H = _H(arlevels? 1 : h, 0, 0, 0, T) * sig2
		psit = _H(h, 0, orthogonal, 0, T) * sig2 //psi'
		if (SystemGMM) psit = arlevels? J(T, T, 0), psit : psit, J(T, T, 0)
	}
	touse2 = colshape(SystemGMM? touse[p = SortByEqID[|arlevels? NT+1 \ SystemHeight : . \ NT|]] : touse, T)'
	if (orthogonal & arlevels == 0) { // Get residuals in first differences for AR() test
		wl = _Difference(Y0 - (X = X0[, keep]) * b, N, T, NT, Complete)
		X = _Difference(X, N, T, NT, Complete)
		if (weights) {
			X = X :* wt0
			w = colshape(wl :* wt0, T)'
			wl = colshape(wl, T)'
		} else
			wl = w = colshape(wl, T)'
	} else {
		if (SystemGMM) {
			w = (*pe)[p] // get residuals subject to AR() test 
			X = X[p,]
		} else
			w = *pe
		if (weights) {
			wl = colshape(w :/ wt0, T)'
			w = colshape(w, T)'
		} else
			wl = w = colshape(w, T)'
	}
	ARz = ARp = J(artests, 1, .)
	for (lag=1; lag<=artests; lag++) {
		__lag(wl)
		if (any(sum_wwli = colsum(w :* wl))) {
			ZHw = J(j, 1, 0); Subscripts = SubscriptsStart
			if (onestepnonrobust) {
				wHw = 0
				for (i = N; i; i--) {
					_wt = weights? wt[|Subscripts|][|1\T|] : 1
					wli = wl[,i] :* touse2[,i]
					wHw = wHw + quadcross(wli, quadcross(H, _wt, wli))
					psiw = quadcross(psit, _wt, wli) // DPD uses ortho dev residuals here
					ZHw = ZHw + ((j_IV? quadcross(Z_IV[|Subscripts|], psiw) : J(0, 1, 0)) \ (j_GMM? quadcross(favorspeed()? Z_GMM[|Subscripts|] :
							_MakeGMMInstSet(N, T, NT, SystemHeight, RowsPerGroup, j_GMM, touse, GMMInstParams, i) :* touse[|Subscripts|], psiw) : J(0, 1, 0)))
					Subscripts = Subscripts - SubscriptsStep
				}
			} else {
				wHw = sum(sum_wwli :* sum_wwli)
				for (i = N; i; i--) {
					ZHw = ZHw + ((j_IV? quadcross(Z_IV[|Subscripts|], *pei[i]) : J(0, 1, 0)) \ (j_GMM? quadcross(favorspeed()? Z_GMM[|Subscripts|] :
							_MakeGMMInstSet(N, T, NT, SystemHeight, RowsPerGroup, j_GMM, touse, GMMInstParams, i), *pei[i]) : J(0, 1, 0))) * sum_wwli[i]
					Subscripts = Subscripts - SubscriptsStep
				}
			}
			Xwl = quadcross(X, vec(wl))
			ARp[lag] = 2 * normal(-abs(ARz[lag] = sum(sum_wwli) / sqrt(wHw + quadcross(Xwl, (m2VZXA * ZHw + *pV * Xwl)))))
		}
	}

	if (consopt & rows(b) > 1) { // move constant term to the end
		p = 2..k, 1
		b = b[p]
		V = V[p, p]
		XNames = XNames[p]
	}

	st_local("b", bname=st_tempname())
	st_matrix(bname, b')
	Stripe = J(k, 1, ""), XNames'
	st_matrixcolstripe(bname, Stripe)
	st_local("V", Vname=st_tempname())
	st_matrix(Vname, V)
	st_matrixcolstripe(Vname, Stripe)
	st_matrixrowstripe(Vname, Stripe)
	stata("est post " + bname + " " + Vname + "," + (small? "dof(" + strofreal(DFr)+")" : "") + " obs(" + strofreal(NumObs) + 
				") esample(" + touseName +") depname(" + VarlistNames[1] + ")")

	st_numscalar("e(sargan)", sargan)
	st_numscalar("e(sar_df)", sarganDF)
	st_numscalar("e(sarganp)", sarganp)

	if (hansen != .) {
		st_numscalar("e(hansen)", hansen)
		st_numscalar("e(hansen_df)", sarganDF)
		st_numscalar("e(hansenp)", hansenp)
	}

	if (cols(diffsargans)) {
		st_matrix("e(diffsargan)", diffsargans)
		st_matrixcolstripe("e(diffsargan)", (J(NumDiffSargans, 1, ""), substr(subinstr(InstOptTxt, ".", ""), 1, 32)))
		st_matrixrowstripe("e(diffsargan)", (J(5, 1, ""), 
		  ("Unrestricted Sargan/Hansen"\"Difference in Sargan"\"Instruments generated"\"Unrestricted Sargan p"\"Difference in Sargan p")))
	}

	bname = ""
	for (i=1; i<=rows(InstOptTxt); i++) st_global("e(diffgroup"+strofreal(i)+")", InstOptTxt[i])

	st_matrix("e(A1)", A1)
	if (A2 != .) st_matrix("e(A2)", A2)
	st_matrix("e(ivequation)", ivequation)
	st_matrix("e(ivpassthru)", ivpassthru)
	st_matrix("e(ivmz)", ivmz)
	st_matrix("e(gmmequation)", gmmequation)
	st_matrix("e(gmmpassthru)", gmmpassthru)
	st_matrix("e(gmmcollapse)", gmmcollapse)
	st_matrix("e(gmmlaglimits)", gmmlaglimits)
	st_global("e(depvar)", VarlistNames[1])
	st_numscalar("e(N)", NumObs)
	st_numscalar("e(sig2)", sig2)
	st_numscalar("e(artests)", artests)
	st_global("e(transform)", orthogonal? "orthogonal deviations" : "first differences")
	st_numscalar("e(g_min)", TiMinMax[1])
	st_numscalar("e(g_max)", TiMinMax[2])
	st_numscalar("e(N_g)", NumGroups)
	st_numscalar("e(df_m)", DFm)
	st_numscalar("e(h)", h)
	if (Nclust) {
		st_global("e(clustvar)", st_local("cluster"))
		st_numscalar("e(N_clust)", Nclust)
	}
	if (strlen(wexp)) {
		st_global("e(wtype)", wtype)
		st_global("e(wexp)", wexp)
	}
	if (small) {
		st_numscalar("e(F)", F)
		st_numscalar("e(F_p)", Fp)
		st_numscalar("e(df_r)", DFr)
		st_global("e(small)", "small")
	} else {
		st_numscalar("e(chi2)", Wald)
		st_numscalar("e(chi2p)", Waldp)
	}

	st_numscalar("e(g_avg)", NumObs / NumGroups)

	for (i=1; i<=artests; i++) {
		st_numscalar("e(ar" + strofreal(i) + ")", ARz[i])
		st_numscalar("e(ar" + strofreal(i) + "p)", ARp[i])
	}

	if (steps==2)
		st_global("e(twostep)", "twostep")
	if (robust) {
		st_global("e(robust)", "robust")
		st_global("e(vcetype)", steps==1? "Robust" : "Corrected")
	}
	for (i=cols(GMMInstNames); i; i--) st_global("e(gmminsts"+strofreal(i)+")", GMMInstNames[i])
	for (i=cols(IVInstNames) ; i; i--) st_global("e(ivinsts"+strofreal(i)+")", IVInstNames[i])

	st_global("e(ivar)", idName)
	st_global("e(tvar)", tName)
	st_numscalar("e(j)", j0)
	st_numscalar("e(j0)", j)
	st_global("e(esttype)", SystemGMM? "system" : "difference")
	st_global("e(artype)", arlevels? "levels" : "first differences")
	st_local("level", LevelArg)
	st_global("e(predict)", "xtab2_p")
	st_global("e(cmd)", "xtabond2")

	return(0) 
}

real scalar _ParseInsts(real scalar j_IV, real scalar j_GMM, real scalar NumIVOptions, real scalar NumGMMOptions, string rowvector IVInstNames, 
	string rowvector GMMInstNames, pointer(pointer rowvector) IVInstParams, pointer(pointer rowvector) GMMInstParams, real scalar SystemGMM, 
	real colvector Complete, real scalar N, real scalar T, real scalar NT, string scalar idSampleName, real colvector Fill, real scalar orthogonal,
	real rowvector ivmz, real rowvector ivpassthru, real rowvector ivequation, real rowvector gmmcollapse, 
	real rowvector gmmpassthru, real rowvector gmmequation, real matrix gmmlaglimits) {
	
	string scalar ivstyle, gmmstyle, optionsArg, BaseVarNames, LaglimArg, EquationArg
	string rowvector LaglimStr
	real rowvector BaseInds, Laglim
	real scalar equation, passthru, mz, collapse, split, FullInstSetDiffed, FullInstSetEq, MakeExtraInsts, NumInsts, steps, EquationTokenCount
	real matrix BaseVars, Base
	pointer(real matrix) scalar pBase, pBaseAll
	pragma unset BaseVars
	
	st_local("0", "," + st_local("options"))
	stata("syntax [, IVstyle(string) *]")
	NumIVOptions = j_IV
	while ((ivstyle = st_local("ivstyle")) != "") {
		NumIVOptions++
		optionsArg = st_local("options")
		st_local("0", ivstyle)
		stata("capture syntax varlist(numeric ts), [Equation(string) Passthru MZ]")
		stata("loc _rc = _rc")
		if (st_local("_rc") != "0") {
			printf("{err}ivstyle(%s) invalid.\n", ivstyle)
			return (198)
		}

		BaseInds = st_tsrevar(tokens(BaseVarNames = st_local("varlist")))

		st_local ("0", "," + (EquationArg = st_local("equation")))
		EquationTokenCount = cols(tokens(EquationArg))
		stata("capture syntax, [Diff Level Both]")
		stata("loc _rc = _rc")
		if (EquationTokenCount > 1 | st_local("_rc") != "0") {
			printf("{err}equation(%s) invalid.\n", EquationArg)
			return (198)
		}
		// 0=eq(level), 1=eq(diff), 2=eq(both) (default)
		equation = EquationTokenCount==0 | st_local("both")!="" ? 2 : st_local("level")==""
		ivequation = ivequation, equation

		if ((SystemGMM | equation)==0)
			printf ("{txt}Instruments for levels equations only ignored since noleveleq specified.\n")
		else {
			ivpassthru = ivpassthru, (passthru = st_local("passthru") != "")
			ivmz = ivmz, (mz = st_local("mz") != "")
	
			if (passthru & equation == 2 & SystemGMM) {
				printf ("{err}passthru not valid with equation(both) in system GMM.\n")
				return (198)
			}

			ivstyle = "iv(" + BaseVarNames
			if (mz | passthru | equation != 2) {
				ivstyle = ivstyle + ","
				if (passthru) ivstyle = ivstyle + " passthru"
				if (mz) ivstyle = ivstyle + " mz"
				if (equation !=2) ivstyle = ivstyle + " eq(" + (equation? "diff" : "level") + ")"
			}
			ivstyle = ivstyle + ")"

			st_view(BaseVars, . , BaseInds, idSampleName)
			(Base = J(NT, cols(BaseInds), .))[Fill, .] = BaseVars
			// Insert vector of paramaters and data describing IV inst set at head of linked list of such vectors
			IVInstParams = &(IVInstParams, &(mz+0), &(passthru+0), &(equation+0), &(Base:+0), &(ivstyle+""))
			if (orthogonal)
				Complete = Complete :& !rowmissing(Base)
			IVInstNames = IVInstNames, BaseVarNames
			j_IV = j_IV + cols(BaseInds)
		}
		st_local("0", "," + optionsArg)
		stata("syntax [, IVstyle(string) *]")
	}

	st_local("0", "," + st_local("options"))
	stata("syntax [, GMMstyle(string) *]")
	GMMInstNames = J(1, 0, ""); GMMInstParams = NULL; j_GMM = 0
	NumGMMOptions = 0; gmmlaglimits = J(2, 0, 0)
	while ((gmmstyle = st_local("gmmstyle")) != "") {
		optionsArg = st_local("options")
		st_local ("0", gmmstyle)
		stata("capture syntax anything, [SPlit *]") // strip out this suboption first so it won't appear in diff-sargan reports
		stata("loc _rc = _rc")
		if (st_local("_rc") != "0") {
			printf("{err}gmmstyle(%s) invalid.\n", gmmstyle)
			return (198)
		}
		split = strlen(st_local("split")) > 0

		if (strlen(gmmstyle = st_local("options")))
			gmmstyle = st_local("anything") + ", " + gmmstyle
		else
			gmmstyle = st_local("anything")

		st_local ("0", gmmstyle)
		stata("capture syntax varlist(numeric ts), [Equation(string) Laglimits(string) Collapse Passthru]")
		stata("loc _rc = _rc")
		if (st_local("_rc") != "0") {
			printf("{err}gmmstyle(%s) invalid.\n", gmmstyle)
			return (198)
		}
		passthru = st_local("passthru") != ""
		collapse = st_local("collapse") != ""

		BaseInds = st_tsrevar(tokens(BaseVarNames = st_local("varlist")))

		st_local ("0", "," + (EquationArg = st_local("equation")))
		EquationTokenCount = cols(tokens(EquationArg))
		stata("capture syntax, [Diff Level Both]")
		stata("loc _rc = _rc")
		if (EquationTokenCount > 1 | st_local("_rc") != "0") {
			printf("{err}equation(%s) invalid.\n", EquationArg)
			return (198)
		}
		// 0=eq(level), 1=eq(diff), 2=eq(both) (default)
		equation = EquationTokenCount==0 | st_local("both")!="" ? 2 : st_local("level")==""
	
		if (split & equation != 2) {
			printf("{err}equation(%s) split invalid.\n", EquationArg)
			return (198)
		}

		if ((SystemGMM | equation)==0)
			printf ("{txt}Instruments for levels equations only ignored since noleveleq specified.\n")
		else {
			if (SystemGMM & passthru & equation==2) {
				printf("{err}passthru not valid with equation(both) in system GMM.\n")
				return (198)
			}

			if (cols(LaglimStr = tokens(LaglimArg = st_local("laglimits")))) {
				if (cols(LaglimStr) != 2) {
					printf("{err}Laglimits(%s) must have two arguments.\n", LaglimArg)
					return (198)
				}
				if (missing(Laglim = strtoreal(LaglimStr)) > sum(LaglimStr :== ".")) {
					printf("{err}Laglimits(%s) invalid.\n", LaglimArg)
					return (198)
				}
				if (Laglim[1] == .)
					Laglim[1] = 1
				if (Laglim[1] > Laglim[2])
					Laglim = Laglim[(2,1)]
			} else
				Laglim = 1, .

			// Get base vars filled out to NT rows
			st_view(BaseVars, . , BaseInds, idSampleName)
			(Base = J(NT, cols(BaseInds), .))[Fill, .] = BaseVars
			pBase = collapse? &Base : &_Explode(Base, N, T, NT)
			
			gmmstyle = "gmm(" + BaseVarNames + ","

			if (collapse) gmmstyle = gmmstyle + " collapse"
			if (passthru) gmmstyle = gmmstyle + " passthru"
			if (split)
				equation = 1
			else {
				if (equation != 2) gmmstyle = gmmstyle + " eq(" + (equation? "diff" : "level") + ")"
				gmmstyle = gmmstyle + " lag(" + strofreal(Laglim[1]) + " " + strofreal(Laglim[2]) + ")" 
			}

			MakeExtraInsts = equation==2 & SystemGMM   // Need to make extra instruments, as in standard Blundell-Bond?
			pBaseAll = &(SystemGMM | !(equation | passthru)? 
						&editmissing(*pBase, 0) \ (collapse? 
								&_Difference(Base, N, T, NT, Complete, 0) :
								&_Explode(_Difference(Base, N, T, NT, Complete, 0), N, T, NT)) :
						&editmissing(*pBase, 0))

			for (steps=split+1; steps; steps--) { // run twice for split groups
				NumGMMOptions++
				gmmpassthru = gmmpassthru, passthru
				gmmcollapse = gmmcollapse, collapse
				gmmlaglimits = gmmlaglimits, Laglim'
				gmmequation = gmmequation, equation
				FullInstSetEq = !equation  			 // Should the full instrument set apply to levels equations?
				FullInstSetDiffed = !(equation | passthru) // Should the "full" instrument set be differenced?
				j_GMM = j_GMM + (NumInsts = cols(BaseInds) * _GMMInstsPerBaseVar(Laglim, collapse, FullInstSetDiffed, FullInstSetEq, MakeExtraInsts, T))

				// Insert vector of paramaters and data describing GMM inst set at head of linked list of such vectors
				GMMInstParams = &(GMMInstParams, &cols(BaseInds), &(Laglim:+0), &(collapse+0), &(FullInstSetDiffed+0), 
					&(FullInstSetEq+0), &(MakeExtraInsts+0), pBaseAll, 
					&(gmmstyle + (split? (equation? " eq(diff)" : " eq(level)") + " lag(" + strofreal(Laglim[1]) + " " + strofreal(Laglim[2]) + ")" : "") + ")"),
					&(NumInsts + 0))
				GMMInstNames = GMMInstNames, BaseVarNames

				if (split & steps) { // run at end of 1st iteration of split group
					equation--
					Laglim = J(1, 2, Laglim[1] - 1)  // levels inst for gmm(X, lag(a b)) specified by gmm(X, lag(a-1 a-1) eq(lev)
				}
			}
		}
		st_local("0", "," + optionsArg)
		stata("syntax [, GMMstyle(string) *]")
	}
	return (0)
}

void _MakeIVInsts(pointer(real rowvector) matrix InstOptInd, string rowvector InstOptTxt, real scalar g, real matrix Z_IV, real scalar j_IV, 
		real scalar SystemHeight, real scalar N, real scalar T, real scalar NT, real scalar NumIVOptions, pointer(pointer rowvector) IVInstParams, 
		pointer (real matrix function) pfnXform, real colvector Complete, real scalar NumDiffSargans, real scalar SystemGMM) {
	real scalar mz, passthru, equation, i
	real matrix Base
	string scalar ivstyle
	real colvector p

	InstOptInd = J(NumDiffSargans, 2, &.) // holds col indices for complements of each instument group--IV and GMM separate
	InstOptTxt = J(NumDiffSargans, 1, " ")
	g = NumDiffSargans
	Z_IV = J(SystemHeight, j_IV, 0)
	if (NumIVOptions) {
		i = 0
		while (IVInstParams != NULL) {
			mz = *(*IVInstParams)[2]
			passthru = *(*IVInstParams)[3]
			equation = *(*IVInstParams)[4] // 0=levels only, 1=differences only, 2=both
			Base = *(*IVInstParams)[5]     // instrumenting variables
			ivstyle = *(*IVInstParams)[6]

			if (equation)
				Z_IV[|1,i+1 \ NT, i+cols(Base)|] = mz? editmissing((passthru? Base : (*pfnXform)(Base, N, T, NT, Complete)),0) : 
												     (passthru? Base : (*pfnXform)(Base, N, T, NT, Complete))
			if (SystemGMM & equation != 1)
				Z_IV[|NT+1,i+1 \ SystemHeight, i+cols(Base)|] = mz? editmissing(Base,0) : Base
	
			if (strlen(ivstyle)) { // leave constant term out of diff-sargan testing
				p = 0..i , i+1+cols(Base)..j_IV+1
				InstOptInd[g, 1] = &(cols(p)>2 ? p[|2 \ cols(p) - 1|] : J(1, 0, .))
				InstOptTxt[g--] = ivstyle
			}
			i = i + cols(Base)
			IVInstParams = (*IVInstParams)[1]
		}
	}
}

// Compute Z'e given e
real colvector _Ze(real colvector e, real rowvector p_IV, real rowvector p_GMM, real scalar N, real scalar T, real scalar NT, 
				real scalar SystemHeight, real scalar RowsPerGroup, real scalar j_GMM, real colvector touse, 
				pointer(pointer rowvector) GMMInstParams, real matrix SubscriptsStart, real matrix SubscriptsStep, real matrix Z_IV, real matrix Z_GMM) {
	real matrix ZGMMe, Subscripts
	real scalar id, j
	if (favorspeed())
		ZGMMe = quadcross(Z_GMM[, p_GMM], e)
	else if (cols(p_GMM) == 0)
		ZGMMe = J(0, 1, 0)
	else {
		j = p_GMM == . ? j_GMM : cols(p_GMM)
		ZGMMe = J(j, 1, 0)
		Subscripts = SubscriptsStart
		for (id = N; id; id--) {
			ZGMMe = ZGMMe + quadcross(_MakeGMMInstSet(N, T, NT, SystemHeight, RowsPerGroup, j_GMM, touse, GMMInstParams, id)[, p_GMM], e[|Subscripts|])
			Subscripts = Subscripts - SubscriptsStep
		}
	}
	return (quadcross(Z_IV[, p_IV], e) \ ZGMMe)
}

// Return matrix that effects transformation, transposed
real matrix _xform(real scalar xform, real scalar T) { //xform = 0 is diff, 1 is orthog dev
	real matrix M
	real scalar r
	M = I(T)
	if (xform) {
		__lag(M, -1)  // lag it to parallel first difference transform
		for (r=T-1; r; r--) M[|r+1, r+1 \ ., r+1|] = J(T-r, 1, -1/(T-r))
		return (M :/ editvalue(sqrt(colsum(M :^ 2)), 0, 1))
	}
	(M = M - _lag(M, -1))[1,1] = 0
	return (M)
}

// h is value of h() option; LeftXform and Rightxform=0 for diff, 1 for orthog
real matrix _H(real scalar h, real scalar LeftXform, real scalar RightXform, real scalar SystemGMM, real scalar T) {
	real matrix Ml, Mr, H
	Ml = h==1? I(T) : _xform(LeftXform, T)
	Mr = h==1? I(T) : _xform(RightXform, T)
	if (h == 3 | SystemGMM==0) {
		if (SystemGMM) {
			Ml = Ml , I(T)
			Mr = Mr , I(T)
		}
		H = quadcross(Ml, Mr)
	} else
		H = blockdiag(quadcross(Ml,Mr), I(T))
	_edittozero(H, 100)
	return (H)
}

real scalar _GMMInstsPerBaseVar(real rowvector Laglim,             // lag limits
					real scalar collapse,            // "collapse" flag
					real scalar FullInstSetDiffed,   // flag for whether main instrument set is actually differenced
					real scalar FullInstSetEq,       // Equations that get full instruments (0=diff, 1=levels)
					real scalar MakeExtraInsts,      // In system GMM, flag for whether to make additional instruments for levels
					real scalar T)
{
	real scalar InstsPerBaseVar, LagMax, ForwardMax, N1, N2

	LagMax = T - 1 - (FullInstSetEq & FullInstSetDiffed)  // the most one could lag in the "full" instrument set
	ForwardMax = T - 2 + FullInstSetEq 		            // the most one could "forward" therein
	Laglim = max((-ForwardMax, Laglim[1])) , min((LagMax, Laglim[2]))

	if (collapse)
		return (Laglim[2] - Laglim[1] + 1 + MakeExtraInsts)
	else {
		if (Laglim[1] > 0 | Laglim[2] <= 0) {
			if (Laglim[1] >= 0) {
				N1 = LagMax - Laglim[1] + 1
				N2 = LagMax - Laglim[2]
			} else {
				N1 = ForwardMax + Laglim[2] + 1
				N2 = ForwardMax + Laglim[1]
			}
			InstsPerBaseVar = (N1*(N1+1) - N2*(N2+1))/2
		} else {
			N1 = ForwardMax + Laglim[1]
			N2 = LagMax - Laglim[2]
			InstsPerBaseVar = (LagMax+1)*(ForwardMax+1) - (N1*(N1+1) + N2*(N2+1))/2
		}
		return (MakeExtraInsts? InstsPerBaseVar + T - FullInstSetDiffed : InstsPerBaseVar)
	}
}

real matrix _MakeGMMInstSet(real scalar N, real scalar T, real scalar NT, real scalar SystemHeight, real scalar RowsPerGroup, 
					real scalar j_GMM, real colvector touse, pointer(pointer rowvector) GMMInstParams,
					| real scalar id) // If id!=., restricts Z to just this individual. 
{
	real scalar c, Lag, LagStop, SearchDir, InstOffset, Zeros, FullInstSetDiffed, FullInstSetEq, 
			MakeExtraInsts, NumBaseVars, _N, _NT, _SystemHeight, collapse
	real matrix Z, SubscriptsBase, SubscriptsInst, SubscriptsStep, NewInsts
	real colvector p
	real rowvector Laglim
	pointer (pointer rowvector) ThisInstParams
	pointer(real matrix) colvector BaseAll

	if (id != .) {
		_N = 1
		_NT = T
		_SystemHeight = RowsPerGroup
	} else {
		_N = N
		_NT = NT
		_SystemHeight = SystemHeight
	}
	Z = J(_SystemHeight, j_GMM, 0)
	ThisInstParams = GMMInstParams
	SubscriptsInst = J(2, 2, 1)
	while (ThisInstParams != NULL) {
		NumBaseVars = *(*ThisInstParams)[2]
		Laglim = *(*ThisInstParams)[3]
		collapse = *(*ThisInstParams)[4]
		FullInstSetDiffed = *(*ThisInstParams)[5]
		FullInstSetEq = *(*ThisInstParams)[6]
		MakeExtraInsts = *(*ThisInstParams)[7]
		BaseAll = *(*ThisInstParams)[8]

		if (id != .)
			for (c=1; c<=rows(BaseAll); c++) // Restrict to this id
				BaseAll[c] = &((*BaseAll[c])[|(id - 1) * T + 1, . \ id * T, .|])

		// Make instruments by copying columns sets from the "Wide" or (if collapse) original matrices.
		SubscriptsStep = 1, (collapse? 0 : NumBaseVars)
		SubscriptsBase = Laglim[1]+FullInstSetEq > 0? 1+FullInstSetDiffed \ _NT-Laglim[1] : 2-FullInstSetEq-Laglim[1] \ _NT
		SubscriptsBase = SubscriptsBase, (collapse? 1 \ NumBaseVars : (SubscriptsBase[1]-1) * NumBaseVars + 1 \ (SubscriptsBase[2] + T - _NT) * NumBaseVars)
		InstOffset = FullInstSetEq * _NT + Laglim[1]
		for (Lag = Laglim[1]; Lag <= Laglim[2]; Lag++) {
			SubscriptsInst = SubscriptsBase[,1] :+ InstOffset++ , SubscriptsBase[,2] :+ SubscriptsInst[1,2]-SubscriptsBase[1,2]
			NewInsts = (*BaseAll[FullInstSetDiffed+1])[|SubscriptsBase|]
			if (Lag & _N > 1) {  // 0 out rows that shifted into adjacent group
				Zeros = abs(Lag) + (!FullInstSetEq & Lag<0)
				p = 0 :: Zeros * (_N - 1) - 1
				p = trunc(p :/ Zeros) :* T + mod(p, Zeros) :+ T - Zeros + 1
				NewInsts[p,] = J(rows(p), cols(NewInsts), 0)
			}
			Z[|SubscriptsInst|] = NewInsts
			SubscriptsInst[1,2] = SubscriptsInst[2,2] + 1
			// To move base frame, raise the top end toward t=1, or, if it's flush against t=1, raise its bottom end
			if (Lag>=0) SubscriptsBase[2,] = SubscriptsBase[2,] - SubscriptsStep
			if (Lag<0 | Lag==0 & !FullInstSetDiffed) SubscriptsBase[1,] = SubscriptsBase[1,] - SubscriptsStep
		}
	
		if (MakeExtraInsts) {		
			if (Laglim[1] > 0) { // if both lags positive, start at lag Laglim[1]-1 (as is standard) and search to deeper lags if neceesary
				Lag = Laglim[1] - 1
				LagStop = Laglim[2] - 1
				SearchDir = 1
			} else {
				if (Laglim[2] > 0) { // if lags straddle 0, start at lag 0 and search to deeper lags (which could miss workable negative lags, but we only need one)
					Lag = 0
					LagStop = Laglim[2] - 1
					SearchDir = 1
				} else { // if both lags non-positive, start at lag Laglim[2]-1 and search to deeper forwards if neceesary
					Lag = Laglim[2] - 1
					SearchDir = -1
					LagStop = Laglim[1] - 1
				}
			}
			SubscriptsBase[,1] = Lag >= 0? 1 \ _NT-Lag : 1-Lag \ _NT
			SubscriptsBase[,2] = collapse? 1 \ NumBaseVars : (SubscriptsBase[1,1]-1) * NumBaseVars + 1 \ (SubscriptsBase[2,1] + T - _NT) * NumBaseVars
			SubscriptsInst = SubscriptsBase[,1] :+ Lag+_NT , SubscriptsBase[,2] :+ SubscriptsInst[1,2]-SubscriptsBase[1,2]
			NewInsts = (*BaseAll[2])[|SubscriptsBase|]
			if (Lag & _N > 1) {  // 0 out rows that shifted into adjacent group
				Zeros = abs(Lag)
				p = 0 :: Zeros * (_N - 1) - 1
				p = trunc(p :/ Zeros) :* T + mod(p, Zeros) :+ T - Zeros + 1
				NewInsts[p, .] = J(rows(p), cols(NewInsts), 0)
			}
			Z[|SubscriptsInst|] = NewInsts
			SubscriptsInst[1,2] = SubscriptsInst[2,2] + 1

			// If any of these instruments happens to be 0 for all included observations, try shifting it to other t's in the instrument matrix
			if (SearchDir == 1) 
				for (c=SubscriptsInst[1,2]; c<=SubscriptsInst[2,2]; c++) { 
					for (; Lag<LagStop & !any(Z[,c] :& touse); Lag++)   
						Z[|2,c \ .,c|] = Z[|1,c \ _SystemHeight-1, c|]
						Z[_NT+1, c] = 0
					}
			else
				for (c=SubscriptsInst[1,2]; c<=SubscriptsInst[2,2]; c++)
					for (; Lag>LagStop & !any(Z[,c] :& touse); Lag--) {
						Z[|1,c \ _SystemHeight-1, c|] = Z[|2,c \ .,c|]
						Z[_SystemHeight, c] = 0
					}
		}
		ThisInstParams = (*ThisInstParams)[1]
	}
	return (Z)
}

real rowvector _rmcoll(real matrix X, | real scalar nocons, string rowvector varnames) {
	real rowvector keep; pointer (real matrix) pX; real matrix L, U; real colvector p; real scalar i, jkeep
	pragma unset L; pragma unset U; pragma unset p
	if (rows(X)) {
		pX = nocons==1? &X : &(J(rows(X), 1, 1), X)
		lud(quadcross(*pX, *pX), L, U, p)
		_edittozero(U = diagonal(U), 10000)
		keep = J(1, cols(X) - sum(!U), 0)
		jkeep = 1
		if (nocons != 1)
			U = U[|2 \ .|]
		for (i = 1; i <= rows(U); i++)
			if (U[i])
				keep[jkeep++] = i
			else if (cols(varnames))
				printf("{txt}%s dropped due to collinearity\n", varnames[i])
		return (keep)
	}
	return (.)
}

real matrix _Difference(real matrix X, real scalar N, real scalar T, real scalar NT, real colvector Complete, | real scalar MissingFillValue) {
	pragma unused Complete
	real matrix X2
	X2 = J(NT, cols(X), MissingFillValue)
	X2[|2,. \ .,.|] = X[|2,. \ .,.|] - X[|1,. \ NT-1,.|]
	if (N > 1) X2[(1::N-1) :* T :+ 1, .] = J(N-1, cols(X), MissingFillValue)
	if (MissingFillValue != .) _editmissing(X2, MissingFillValue)
	return (X2)
}

real matrix _Orthog(real matrix X, real scalar N, real scalar T, real scalar NT, real colvector Complete) {
	pragma unused N
	real matrix X2; real scalar i, j, it, NLeadingVals; real rowvector SumLeadingVals
	X2 = J(NT, j=cols(X), .)
	it = NT
	for (i = NT; i; i = i - T) {
		NLeadingVals = 0; SumLeadingVals = J(1, j, 0)
		for (; it > i - T; it--) {
			if (NLeadingVals)
				X2[it + 1,] = sqrt(1-1/(NLeadingVals+1)) * (X[it, .] - SumLeadingVals / NLeadingVals)
			if (Complete[it]) {
				NLeadingVals++
				SumLeadingVals = SumLeadingVals + X[it,]
			}
		}
	}
	return (X2)
}

// Explode variables to one column per basevar-period
real matrix _Explode(real matrix Base, real scalar N, real scalar T, real scalar NT) {
	real matrix BaseWide, pr, pc, prStep, pcStep; real scalar NumBaseVars, TNumBaseVars
	BaseWide = J(NT, TNumBaseVars = T * (NumBaseVars=cols(Base)), 0)
	pc = TNumBaseVars-NumBaseVars+1 .. TNumBaseVars
	pcStep = J(1, NumBaseVars, NumBaseVars)
	prStep = J(N, 1, 1)
	for (pr = (1::N) :* T; pr[1,1]; pr = pr - prStep) {
		BaseWide[pr, pc] = Base[pr, .]
		pc = pc - pcStep
	}
	return(BaseWide)
}

void __lag(real matrix X, | real scalar lag) {
	if (lag > 0) {
		if (lag == .) lag = 1
		X[|1+lag,. \ .,.|] = X[|.,. \ rows(X)-lag,.|]
		X[|.,. \ lag,.|] = J(lag, cols(X), 0)
	} else if (lag < 0) {
		X[|.,. \ rows(X)+lag,.|] = X[|1-lag,. \ .,.|]
		X[|rows(X)+lag+1,. \ .,.|] = J(-lag, cols(X), 0)
	}
}

real matrix _lag(real matrix X, | real scalar lag) {
	real matrix X2
	__lag(X2 = X, lag)
	return (X2)
}

void svmat(real matrix X, real matrix Y, real matrix Z, real matrix touse, real matrix wt) {
	external real matrix xtabond2_X, xtabond2_Y, xtabond2_Z, xtabond2_wt, xtabond2_sample
	xtabond2_X = X
	xtabond2_Y = Y
	xtabond2_Z = Z
	xtabond2_sample = touse
	xtabond2_wt = wt
}

mata mlib create lxtabond2, dir(PLUS) replace
mata mlib add lxtabond2 *(), dir(PLUS)
mata mlib index
end
